[Skip for now]
filetable <- read.table(file="/Users/shubhamagrawal/Documents/work/compressed/DIA/fileTable.txt", header = TRUE)
mae <- readExperimentDIA(fileTable = filetable,
annotation_col = c("treatment", "timepoint", "replicate",
"sampleType"))
mae <- readRDS("data/maeObjNEW.Rds")
mae
## A MultiAssayExperiment object of 2 listed
## experiments with user-defined names and respective classes.
## Containing an ExperimentList class object of length 2:
## [1] Phosphoproteome: SummarizedExperiment with 40517 rows and 138 columns
## [2] Proteome: SummarizedExperiment with 8518 rows and 79 columns
## Functionality:
## experiments() - obtain the ExperimentList instance
## colData() - the primary/phenotype DataFrame
## sampleMap() - the sample coordination DataFrame
## `$`, `[`, `[[` - extract colData columns, subset, or experiment
## *Format() - convert into a long or wide DataFrame
## assays() - convert ExperimentList to a SimpleList of matrices
## exportClass() - save data to flat files
se <- mae[["Phosphoproteome"]]
colData(se) <- colData(mae[, colnames(se)])
se
## class: SummarizedExperiment
## dim: 40517 138
## metadata(0):
## assays(1): Intensity
## rownames(40517): s1 s2 ... s40516 s40517
## rowData names(6): UniprotID Gene ... Sequence site
## colnames(138): FullProteome_1stCtrl_0min_rep2
## FullProteome_1stCtrl_0min_rep3 ... Phospho_HGF_40min_rep1
## Phospho_HGF_6h_rep1
## colData names(6): treatment timepoint ... sample sampleName
plotIntensity(se[, se$sampleType == "Phospho"], color = "replicate") + theme_classic() +
theme(
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0, size = 7),
plot.title = element_text(hjust = 0.5, face = "bold")
)
newSE <- preprocessPhos(seData = se, transform = "log2",
normalize = TRUE, impute = "QRILC")
## Warning in fun(libname, pkgname): mzR has been built against a different Rcpp version (1.0.12)
## than is installed on your system (1.1.0). This might lead to errors
## when loading mzR. If you encounter such issues, please send a report,
## including the output of sessionInfo() to the Bioc support forum at
## https://support.bioconductor.org/. For details see also
## https://github.com/sneumann/mzR/wiki/mzR-Rcpp-compiler-linker-issue.
## Imputing along margin 2 (samples/columns).
newSE
## class: SummarizedExperiment
## dim: 13081 59
## metadata(0):
## assays(2): Intensity imputed
## rownames(13081): s1 s4 ... s40511 s40514
## rowData names(6): UniprotID Gene ... Sequence site
## colnames(59): Phospho_1stCtrl_0min_rep2 Phospho_1stCtrl_0min_rep3 ...
## Phospho_HGF_40min_rep1 Phospho_HGF_6h_rep1
## colData names(6): treatment timepoint ... sample sampleName
plotIntensity(newSE, color = "replicate") + theme_classic() +
theme(
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0, size = 7),
plot.title = element_text(hjust = 0.5, face = "bold")
)
plotMissing(newSE) + theme_classic() +
theme(
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0, size = 7),
plot.title = element_text(hjust = 0.5, face = "bold")
)
pca <- stats::prcomp(t(assays(newSE)[["imputed"]]), center = TRUE, scale. = TRUE)
p <- plotPCA(pca = pca, se = newSE,
color = "treatment",
shape = "replicate")
p$layers[[1]]$aes_params$size <- 3
p$layers[[1]]$aes_params$stroke <- 1.2
p + coord_equal() + theme(aspect.ratio = 1, legend.position = "right",
axis.text = element_text(color = "#000000")) +
guides(
shape = guide_legend(order = 1),
color = guide_legend(order = 2)
)
newSE <- preprocessPhos(seData = se, transform = "log2",
normalize = TRUE, impute = "QRILC",
removeOutlier = "rep1")
## Imputing along margin 2 (samples/columns).
newSE
## class: SummarizedExperiment
## dim: 17035 32
## metadata(0):
## assays(2): Intensity imputed
## rownames(17035): s1 s4 ... s40513 s40514
## rowData names(6): UniprotID Gene ... Sequence site
## colnames(32): Phospho_1stCtrl_0min_rep2 Phospho_1stCtrl_0min_rep3 ...
## Phospho_HGF_40min_rep2 Phospho_HGF_40min_rep3
## colData names(6): treatment timepoint ... sample sampleName
pca <- stats::prcomp(t(assays(newSE)[["imputed"]]),
center = TRUE, scale. = TRUE)
p <- plotPCA(pca = pca, se = newSE,
color = "treatment",
shape = "replicate")
p$layers[[1]]$aes_params$size <- 3
p$layers[[1]]$aes_params$stroke <- 1.2
p + coord_equal() + theme(aspect.ratio = 1, legend.position = "right",
axis.text = element_text(color = "#000000")) +
guides(
shape = guide_legend(order = 1),
color = guide_legend(order = 2)
)
p <- plotHeatmap(type = "Top variant", newSE, top = 30, annotationCol = c("replicate", "treatment"))
g <- p$gtable
# Find row names grob
row_names <- which(g$layout$name == "row_names")
g$grobs[[row_names]]$gp <- gpar(fontsize = 8, fontface = "bold")
# Same for columns
col_names <- which(g$layout$name == "col_names")
g$grobs[[col_names]]$gp <- gpar(fontsize = 10)
grid.newpage()
grid.draw(g)
dea <- performDifferentialExp(se = newSE,
assay = "imputed",
method = "limma",
condition = "treatment",
reference = "2ndCtrl",
target = "HGF")
p <- ggplot(dea$resDE, aes(x = pvalue)) +
geom_histogram(fill = "grey", col = "blue", alpha=0.7) +
ggtitle("P value histogram") + theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
p
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
pFilter = 0.01
plotVolcano(dea$resDE, pFilter = 0.01, fcFilter = 1) + theme_classic() +
geom_hline(yintercept = -log10(as.numeric(pFilter)), color = "brown",
linetype = "dashed") +
annotate(x = 3.0, y = -log10(as.numeric(pFilter)) - 0.20,
label = paste0("P-value = ", as.numeric(pFilter)),
geom = "text", size = 3.5, color = "brown")
plotVolcanoDEA(dea$resDE, fcFilter = 1, pFilter = 0.01, usePadj = FALSE)
dea$resDE
## # A tibble: 17,035 × 11
## ID log2FC stat pvalue padj UniprotID Gene Position Residue Sequence
## <I<c> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 s129… 3.41 27.9 1.47e-11 1.78e-7 Q07889 SOS1 1134 S PHGPRSA…
## 2 s337… 2.28 27.0 2.09e-11 1.78e-7 Q9H8W4 PLEK… 226 S CQPARSD…
## 3 s356… 2.17 23.1 1.14e-10 6.47e-7 Q9NZN5 ARHG… 341 S DTQSLVG…
## 4 s7013 2.36 20.7 3.67e-10 1.56e-6 P19634 SLC9… 703 S MSRARIG…
## 5 s110… 1.40 19.7 6.19e-10 1.99e-6 P55196 AFDN 1799 S KERQRLF…
## 6 s217… 3.79 19.5 7.02e-10 1.99e-6 Q76FK4 NOL8 268 S SITPSKS…
## 7 s315… 2.80 18.7 1.07e- 9 2.61e-6 Q9BXB5 OSBP… 209 S PCSQRHL…
## 8 s317… 3.26 18.4 1.32e- 9 2.72e-6 Q9BY89 KIAA… 1669 S SLRRSRF…
## 9 s360… 5.07 18.1 1.57e- 9 2.72e-6 Q9P246 STIM2 697 S VPKPRHT…
## 10 s242… -3.56 -18.1 1.59e- 9 2.72e-6 Q8IY63 AMOT… 906 S LSTTPAH…
## # ℹ 17,025 more rows
## # ℹ 1 more variable: site <chr>
intensityBoxPlot(se = dea$seSub, id = 's4971', symbol = "EGFR_S991")
set.seed(12345)
newSEts <- newSE[ , newSE$treatment == "HGF"]
assayMat <- SummarizedExperiment::assay(newSEts)
exprMat <- lapply(unique(newSEts$timepoint), function(tp) {
rowMedians(assayMat[,newSEts$timepoint == tp])
}) %>% bind_cols() %>% as.matrix()
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
rownames(exprMat) <- rownames(newSEts)
colnames(exprMat) <- unique(newSEts$timepoint)
sds <- apply(exprMat,1,sd)
varPer <- 80
exprMat <- exprMat[order(sds, decreasing = TRUE)[seq(1, varPer/100*nrow(exprMat))], ]
# only center when it's for expression
exprMat <- mscale(exprMat)
# remove NA values
exprMat <- exprMat[complete.cases(exprMat), ]
tsc <- clusterTS(x = exprMat, k = 5, pCut = 0.6)
tsc$plot + theme(
axis.text.x = element_text(size = 10), axis.text = element_text(color = "#000000"), )
genesetPath <- system.file("shiny-app/geneset", package = "SmartPhos")
inGMT1 <- piano::loadGSC(paste0(genesetPath, "/Cancer_Hallmark.gmt"),
type="gmt")
resTab <- enrichDifferential(dea = dea$resDE, type = "Pathway enrichment",
gsaMethod = "PAGE", geneSet = inGMT1,
statType = "stat", nPerm = 200, sigLevel = 0.05,
ifFDR = FALSE)
## Running gene set analysis:
## Checking arguments...done!
## Final gene/gene-set association: 1218 genes and 50 gene sets
## Details:
## Calculating gene set statistics from 1218 out of 4183 gene-level statistics
## Removed 3168 genes from GSC due to lack of matching gene statistics
## Removed 0 gene sets containing no genes after gene removal
## Removed additionally 0 gene sets not matching the size limits
## Loaded additional information for 50 gene sets
## Calculating gene set statistics...done!
## Calculating gene set significance...done!
## Adjusting for multiple testing...done!
resTab
## Name Gene Number Stat p.up p.up.adj
## 1 HALLMARK_MITOTIC_SPINDLE 145 3.3335 0.00042882 0.021441
## 2 HALLMARK_ALLOGRAFT_REJECTION 28 2.2395 0.01256000 0.314010
## 3 HALLMARK_APICAL_SURFACE 10 1.9283 0.02690700 0.368960
## 4 HALLMARK_UV_RESPONSE_DN 53 1.8879 0.02951700 0.368960
## 5 HALLMARK_PI3K_AKT_MTOR_SIGNALING 45 1.7815 0.03741100 0.374110
## 6 HALLMARK_PANCREAS_BETA_CELLS 2 1.6781 0.04666500 0.377200
## 7 HALLMARK_HEDGEHOG_SIGNALING 10 -2.4726 0.99329000 0.999920
## 8 HALLMARK_G2M_CHECKPOINT 116 -2.5218 0.99416000 0.999920
## 9 HALLMARK_DNA_REPAIR 45 -2.5927 0.99524000 0.999920
## 10 HALLMARK_MYC_TARGETS_V1 112 -3.1381 0.99915000 0.999920
## 11 HALLMARK_E2F_TARGETS 116 -3.7875 0.99992000 0.999920
## p.down p.down.adj Number up Number down
## 1 9.9957e-01 0.9995700 78 67
## 2 9.8744e-01 0.9995700 15 13
## 3 9.7309e-01 0.9995700 7 3
## 4 9.7048e-01 0.9995700 30 23
## 5 9.6259e-01 0.9995700 23 22
## 6 9.5333e-01 0.9995700 1 1
## 7 6.7061e-03 0.0670610 1 9
## 8 5.8377e-03 0.0670610 39 77
## 9 4.7618e-03 0.0670610 13 32
## 10 8.5030e-04 0.0212570 39 73
## 11 7.6089e-05 0.0038044 34 82
inGMT2 <- piano::loadGSC(paste0(genesetPath, "/KEGG_pathways.gmt"),
type="gmt")
clustEnr <- clusterEnrich(clusterTab = tsc$cluster, se = newSE,
inputSet = inGMT2, filterP = 0.01, ifFDR = FALSE)
clustEnr$plot + theme_classic()
First on the differential expression analysis results
netw <- getDecouplerNetwork("Homo sapiens")
scoreTab <- calcKinaseScore(dea$resDE, netw, statType = "stat", nPerm = 100)
plotKinaseDE(scoreTab, nTop = 10, pCut = 0.05)
clusterData <- tsc$cluster[tsc$cluster$cluster == "cluster1",]
allClusterFeature <- clusterData %>%
distinct(feature, .keep_all = TRUE) %>% .$feature
allClusterSite <- data.frame(rowData(newSEts)[allClusterFeature, "site"])
allClusterSite$feature <- allClusterFeature
clusterData <- clusterData %>%
left_join(allClusterSite, by = "feature") %>%
rename(site = "rowData.newSEts..allClusterFeature...site..")
scoreTab <- calcKinaseScore(clusterData, netw, statType = "stat", nPerm = 100)
plotKinaseTimeSeries(scoreTab, pCut = 0.05, clusterName = "cluster1")
newSEts <- addZeroTime(newSE, condition = "treatment", treat = "HGF",
zeroTreat = "2ndCtrl",
timeRange = c("20min","40min","100min"))
rd <- as.data.frame(rowData(newSEts))
timerange <- unique(newSEts$timepoint)
sites <- c("MET_Y1234", "MET_Y1235", "MET_1003", "Shc1_Y427", "AKT3_Y312", "RPS6KB1", "EGFR_Y1092", "EGFR_Y1172", "SOS1_S1134", "MAP2K2_S226", "MAPK3_T202", "MAPK1_T190", "EGFR_Y1197", "ERBB2_S998","GAB1_Y659","RPS6_S244")
for (i in sites) {
# condition to check if that site is present in row data
if (i %in% rd$site) {
# condition to check if assay doesn't have NAs (imputed assays are not allowed)
if (!is.na(assay(newSEts)[rownames(rd[rd$site==i,]), ][1])) {
p <- plotTimeSeries(newSEts, type = "expression",
geneID = rownames(rd[rd$site==i,]),
symbol = i, condition = "treatment",
treatment = "HGF", timerange = timerange) +
theme(axis.text = element_text(color = "#000000", size = 20),
axis.text.x = element_text(angle = 0, vjust = 0, hjust = 0.5, size = 20),
axis.title = element_text(size=20),
plot.title = element_text(size=30)) +
scale_x_continuous(breaks = c(0,20,40,60,80,100))
p$layers[[1]]$aes_params$size <- 6
p$layers[[1]]$mapping$colour <- NULL
p$layers[[1]]$aes_params$colour <- "#c1191f"
p$layers[[2]]$aes_params$linewidth <- 1
p$layers[[2]]$mapping$colour <- NULL
p$layers[[2]]$aes_params$colour <- "#555555"
print(p)
}
}
}